Simulating Interval-Censoring

Published

August 11, 2025

Simulation Parameters

Data Generating Processes

The effect sizes of the risk factors are similar to those of the UMOD SNP.

Other Parameters

cut <- seq(0, 10, by = 0.1)
n <- 500

DGPs

# loghazard formula for sim_pexp and sim_weibull

## baseline hazard analysis

  formula_pexp <- "~ -3.5 + dgamma(t, 8, 2) * 6"
  formula_weibull <- "~ -3.5"

## fixed effects analysis
  formula_pexp <- "~ -3.5 + dgamma(t, 8, 2) * 6 -1.3*x1"
  formula_weibull <- "~ -3.5 -1.3*x1"

# implicit loghazard formula for sim_icenReg
formula_icenReg = "~log(1.5) - log(4) + (1.5 - 1) * (log(t) - log(4))"

# sim_pexp
  visits_min = 1
  visits_max = 10
  ic_mechanism = c("beta", "uniform", "equidistant")

# sim_weibull
  shape = 1.5
  ic = TRUE
  visits_min = 1
  visits_max = 10
  ic_mechanism = c("beta", "uniform", "equidistant")

# sim_icenReg
  scale = 4
  shape = 1
  inspections = 2
  inspectLength = 2.5

Censoring Mechanisms

  interval_censor_individual_beta <- function(event_time, event_status, visits_min, visits_max, max_time, round) {
    v <- sample(visits_min:visits_max, 1)
    params <- mvtnorm::rmvnorm(1, mean = c(0, 0), sigma = diag(2))

    shape1 <- abs(params[1]) + 0.5
    shape2 <- abs(params[2]) + 0.5

    quantiles <- seq(0, 1, length.out = v + 2)[-c(1, v + 2)]
    obs_times <- sort(qbeta(quantiles, shape1 = shape1, shape2 = shape2) * max_time)

    if(!is.null(round)) {
      obs_times <- round(obs_times, round)
    }

    obs_times_full <- c(0, obs_times)

    interval_index <- findInterval(event_time, obs_times_full)

    if (interval_index == length(obs_times_full)) {
      # event time exceeds largest observed time: censored after last interval
      time_ic_start <- obs_times_full[interval_index - 1]
      time_ic_stop  <- obs_times_full[interval_index]
      status_ic <- 0
    } else {
      # event time falls between observed interval points
      time_ic_start <- obs_times_full[interval_index]
      time_ic_stop  <- obs_times_full[interval_index + 1]
      status_ic <- ifelse(event_status == 1, 1, 0)
    }

    return(c(time_ic_start = time_ic_start,
            time_ic_stop = time_ic_stop,
            status_ic = status_ic))
  }

  interval_censor_individual_uniform <- function(event_time, event_status, visits_min, visits_max, max_time, round_digits) {
    v <- sample(visits_min:visits_max, 1)

    param <- rnorm(1, mean = 0, sd = 1)
    jitter_sd <- abs(param) + 0.1

    even_times <- seq(0, max_time, length.out = v + 2)[-c(1, v + 2)]
    jittered_times <- even_times + rnorm(v, mean = 0, sd = jitter_sd)

    # Ensure jittered times are strictly within (0, max_time)
    jittered_times <- sort(jittered_times[jittered_times > 0 & jittered_times < max_time])

    # Explicit correction: if no valid jittered times, use max_time as the single interval time
    if (length(jittered_times) == 0) {
      jittered_times <- max_time
    }

    if (!is.null(round_digits)) {
      jittered_times <- round(jittered_times, round_digits)
    }

    obs_times_full <- c(0, jittered_times)

    interval_index <- findInterval(event_time, obs_times_full)

    if (interval_index == length(obs_times_full)) {
      time_ic_start <- obs_times_full[interval_index - 1]
      time_ic_stop  <- obs_times_full[interval_index]
      status_ic <- 0
    } else {
      time_ic_start <- obs_times_full[interval_index]
      time_ic_stop  <- obs_times_full[interval_index + 1]
      status_ic <- ifelse(event_status == 1, 1, 0)
    }

    return(c(time_ic_start = time_ic_start,
            time_ic_stop = time_ic_stop,
            status_ic = status_ic))
  }

  interval_censor_individual_equidistant <- function(event_time, event_status, visits_min, visits_max, max_time, round_digits) {
    breaks <- seq(0, max_time, length.out = visits_max + 1)

    if (!is.null(round_digits)) {
      breaks <- round(breaks, round_digits)
      breaks <- sort(unique(breaks))
    }

    interval_index <- findInterval(event_time, breaks, rightmost.closed = TRUE)

    # Ensure valid interval
    interval_index <- min(max(interval_index, 1), length(breaks) - 1)

    time_ic_start <- breaks[interval_index]
    time_ic_stop  <- breaks[interval_index + 1]
    status_ic <- event_status

    return(c(time_ic_start = time_ic_start,
             time_ic_stop = time_ic_stop,
             status_ic = status_ic))
  }

Models

wrapper_pam <- function(
  data,
  job,
  instance,
  bs = "ps",
  k = 10,
  ic_point = c("mid", "end", "mid_end", "exact", "oracle")) {}


wrapper_cox <- function(
  data,
  job,
  instance,
  ic_point = c("mid", "end", "exact", "oracle")) {}

wrapper_weibull <- function(
  data,
  job,
  instance,
  ic_point = c("mid", "end", "exact", "oracle", "adjustment"),
  fct = c("flexsurvreg", "survreg")
  ) {}

Fixed Effects

Coefficients

Piecewise Exponential DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Weibull DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Coverages

Piecewise Exponential DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Weibull DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Conclusion

  • Coverage of fixed effect hazards is as desired across all DGPs, IC mechanisms, algorithms, and IC points

Baseline Hazards

Illustration of individual baseline log-hazard estimations

Piecewise Exponential DGP

pexp beta pam mid

pexp uniform pam mid

pexp equidistant pam mid

Weibull DGP

weibull beta pam mid

weibull uniform pam mid

weibull equidistant pam mid

icenReg DGP

icenReg uniform pam mid

Illustration of individual baseline cumulative hazard estimations

Piecewise Exponential DGP

pexp beta pam mid

pexp uniform pam mid

pexp equidistant pam mid

Weibull DGP

weibull beta pam mid

weibull uniform pam mid

weibull equidistant pam mid

icenReg DGP

icenReg uniform pam mid

Coverage of Baseline Log-Hazards

Piecewise Exponential DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Weibull DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

icenReg DGP

Uniform IC Mechanism

Coverage of Baseline Cumulative Hazards

Piecewise Exponential DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Weibull DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

icenReg DGP

Uniform IC Mechanism

Coverage of Baseline Survival Functions

Piecewise Exponential DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

Weibull DGP

Beta IC Mechanism

Uniform IC Mechanism

Equidistant IC Mechanism

icenReg DGP

Uniform IC Mechanism

Conclusion

  • If adjustment for IC is not possible, the interval midpoint is always always the better choice than the interval endpoint
  • The coverage of baseline loghazards by PAMs is between 50% and 75%, depending on specifications
  • The coverage of baseline cumulative hazards by PAMs is much higher, between 85% and 97%, depending on specifications
  • The coverage of baseline survival functions by PAMs is similar to that of the baseline cumulative hazards
  • The stark difference between coverage of baseline loghazards versus baseline cumulative hazards is likely due to poor estimation / bias of loghazards for very small and very large t